{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DP Tensor Experiment - v1\n",
    "\n",
    "Purpose: to evaluate whether it's possible to build an automatic sensitivity calculation mechanism which could reliably produce sensitivity scores using only a forward pass through a dynamic graph.\n",
    "\n",
    "Conclusions:\n",
    "- it can be done!\n",
    "- the current model has very loose assumptions - namely that every operation is always using exclusively unique individuals. This is not true in practice and can lead to sensitivity estimations that are higher than they should be\n",
    "- related to this last point - this DPTensor just keeps track of one sensitvity measure per datapoint. However, each datapoint is often sensitive to multiple different individuals (at different levels of sensitivity). Thus, instead, it's far more appropriate to keep track of the list of individuals which have contributed to every datapoint, and the corresponding level of sensitivity.\n",
    "- If we can do the last point - then we can be much more sophisticated about when downstream operations cancel out sensitivity created from upstream operations - which at present we cannot do (as exemplified in the subtraction example below in the notebook)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class DPTensor():\n",
    "    \n",
    "    def __init__(self, data, max_values, min_values):\n",
    "        \n",
    "        self.data = data\n",
    "        self.max_values = max_values\n",
    "        self.min_values = min_values        \n",
    "    \n",
    "    def minimum(self, other):\n",
    "        \n",
    "        # if other is a scalar, create a tensor with its value\n",
    "        if(isinstance(other,(float,int))):\n",
    "            _data = np.zeros_like(self.data) + other\n",
    "            other = DPTensor(data=_data, max_values=_data, min_values=_data)\n",
    "        \n",
    "        _new_data = np.minimum(self.data, other.data)\n",
    "        _new_max_values = np.minimum(self.max_values,other.max_values)\n",
    "        _new_min_values = self.min_values\n",
    "        \n",
    "        return DPTensor(data=_new_data,\n",
    "                        max_values=_new_max_values,\n",
    "                        min_values=_new_min_values)\n",
    "\n",
    "    def maximum(self, other):\n",
    "\n",
    "        # if other is a scalar, create a tensor with its value\n",
    "        if(isinstance(other,(float,int))):\n",
    "            _data = np.zeros_like(self.data) + other\n",
    "            other = DPTensor(data=_data, max_values=_data, min_values=_data)\n",
    "        \n",
    "        _new_data = np.maximum(self.data, other.data)\n",
    "        _new_min_values = np.maximum(self.min_values,other.min_values)\n",
    "        _new_max_values = self.max_values\n",
    "        \n",
    "        return DPTensor(data=_new_data,\n",
    "                        max_values=_new_max_values,\n",
    "                        min_values=_new_min_values)\n",
    "    \n",
    "    def __add__(self, other):\n",
    "        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT\n",
    "        \n",
    "        _data = self.data + other.data\n",
    "        \n",
    "        # remember, it's not about the maximum value that .data could take on\n",
    "        # which would be self._max_values + other._max_values, it's about how\n",
    "        # the maximum amount that .data could CHANGE if an entity is removed.\n",
    "        _max_values = np.maximum(self.max_values, other.max_values)\n",
    "        _min_values = np.minimum(self.min_values, other.min_values)\n",
    "        \n",
    "        return DPTensor(data=_data, \n",
    "                        max_values=_max_values,\n",
    "                        min_values=_min_values)\n",
    "    \n",
    "    def __neg__(self):\n",
    "        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT\n",
    "\n",
    "        _data = -self.data\n",
    "\n",
    "        return DPTensor(data=_data, \n",
    "                        max_values=-self.min_values,\n",
    "                        min_values=-self.max_values)\n",
    "    \n",
    "    def __sub__(self, other):\n",
    "        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT\n",
    "        return (-other) + self\n",
    "    \n",
    "    @property\n",
    "    def sensitivity(self):\n",
    "        return self.max_values - self.min_values\n",
    "\n",
    "\n",
    "class DatasetTensor(DPTensor):\n",
    "    \n",
    "    def __init__(self, data, entities, epsilon, max_values=None, min_values=None):\n",
    "        \n",
    "        self.data = data\n",
    "        self.epsilon = epsilon\n",
    "        self.entities = entities\n",
    "        \n",
    "        if max_values is None:\n",
    "            max_values = np.inf + np.zeros_like(self.data)\n",
    "        self.max_values = max_values    \n",
    "        \n",
    "        if min_values is None:\n",
    "            min_values = -np.inf + np.zeros_like(self.data)            \n",
    "        self.min_values = min_values\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "db = DatasetTensor(data=np.array([0.,1,1,0,1,1,0]), epsilon=np.zeros(7) + 0.1, entities=np.array(range(0,6)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "db2 = db.minimum(1).maximum(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1.])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db2.max_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0., 0., 0., 0., 0.])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db2.min_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1.])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db2.sensitivity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "db3 = -db2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0., -0., -0., -0., -0., -0., -0.])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db3.max_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1., -1., -1., -1., -1., -1., -1.])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db3.min_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1.])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db3.sensitivity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "db4 = db3 + db2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2., 2., 2., 2., 2., 2., 2.])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db4.sensitivity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "db3 = (db2 - db2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0., 0., 0., 0., 0.])"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db3.data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1.])"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db3.max_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0., 0., 0., 0., 0.])"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db2.min_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
